Giotto Setup

Giotto results are placed in the directory “/data/Giotto_AD_codes/analysis/”, data is read from the directory “/data/Giotto_AD_codes/analysis/” Input raw expression data was downloaded from GEO data set GSE152506. Four spatial transcriptomics samples were extracted and bundled together (Two ALzheimer’s model samples: N04_D1 and N05_C2, and two WT samples: B04_D1 and B05_D2). Their spatial coordinates were shifted so that they can be analyzed side by side. Only protein coding genes were included. Gene names were put in upper case because of the format of cell type enrichment reference data. The compiled data for this tutorial can be also found at: https://www.synapse.org/#!Synapse:syn26484805. Cells / spots / spatial spots are sometimes used interchangeably

rm(list = ls())
library(data.table)
library(Giotto)
packageVersion("Giotto") 
## [1] '1.0.4'
## Giotto output is saved in the results_folder
results_folder = "/data/Giotto_AD_codes/analysis/"
dir.create(results_folder)
## Warning in dir.create(results_folder): '/data/Giotto_AD_codes/analysis' already
## exists
## Directory where data is stored
data_path="/data/Giotto_AD_codes/ST_data/"

## Specified Python path is optional, otehrwise the default Python version will be used. 
python_path = NULL
if (F) python_path = "/usr/local/bin/python3"

## Giotto instructions
instrs = createGiottoInstructions(save_dir = results_folder,
                                  save_plot = TRUE,
                                  show_plot = TRUE,
                                  python_path = python_path)
## 
##  no external python path or giotto environment was specified, will check if a default python path is available 
## 
##  A default python path was found:  /usr/bin/python3  and will be used
## 
##  If this is not the correct python path, either
## 
##  1. use installGiottoEnvironment() to install a local miniconda python environment along with required modules 
## 
##  2. provide an existing python path to python_path to use your own python path which has all modules installed
## Read in the expression and spatial position data and create a Giotto object
gg1 <- createGiottoObject(raw_exprs = paste0(data_path,"raw_expression_countsUC.txt"),
                          spatial_locs =  paste0(data_path,"spot_locations.txt"),
                          instructions = instrs)
## Consider to install these (optional) packages to run all possible Giotto commands for spatial analyses:  RTriangle FactoMiner
##  Giotto does not automatically install all these packages as they are not absolutely required and this reduces the number of dependencies
## Warning in evaluate_spatial_locations(spatial_locs = spatial_locs, cores = cores, : There are non numeric or integer columns for the spatial location input at column position(s): 1
##  The first non-numeric column will be considered as a cell ID to test for consistency with the expression matrix
##  Other non numeric columns will be removed
## Positions of spatial transctiptomic spots
spatPlot(gobject = gg1,  point_alpha = 0.7, save_param = list(save_name = '2_a_spatplot_image'), point_size=2.5)

## Show cell (spatial spot) metadata
pDataDT(gg1)
##             cell_ID
##    1: B05_D2__20_25
##    2:  B05_D2__16_9
##    3:  B05_D2__6_28
##    4:  B05_D2__19_4
##    5: B05_D2__17_20
##   ---              
## 1994: N04_D1__12_26
## 1995: N04_D1__27_18
## 1996: N04_D1__13_27
## 1997: N04_D1__28_21
## 1998: N04_D1__19_30
## Plot the distributions of gene and cell counts in order to determine the optimal filtering parameters
filterDistributions(gg1, detection = 'genes', save_param = list(save_name="1_a_filterDistributions_genes"))

filterDistributions(gg1, detection = 'cells', save_param = list(save_name="1_b_filterDistributions_spots"))

filterCombinations(gg1,
                   expression_thresholds = c(1),
                   gene_det_in_min_cells = c(15, 20, 25, 30),
                   min_det_genes_per_cell = c(300, 300, 300, 300),
                   save_param = list(save_name="1_c_filterCombinations"))

## $results
##    threshold gene_detected_in_min_cells min_detected_genes_per_cell combination
## 1:         1                         15                         300      15-300
## 2:         1                         20                         300      20-300
## 3:         1                         25                         300      25-300
## 4:         1                         30                         300      30-300
##    removed_genes removed_cells
## 1:          4548             0
## 2:          5179             0
## 3:          5632             0
## 4:          5996             0
## 
## $ggplot

## Apply filtering
gg1 <- filterGiotto(gobject = gg1,
                    expression_threshold = 1,
                    gene_det_in_min_cells = 20,
                    min_det_genes_per_cell = 300,
                    expression_values = c('raw'),
                    verbose = T)
## Number of cells removed:  0  out of  1998 
## Number of genes removed:  5179  out of  20249
## Normalization of expression data
gg1 <- normalizeGiotto(gobject = gg1, scalefactor = 6000, verbose = T)
## 
##  first scale genes and then cells
## Add statistical info to metadata
gg1 <- addStatistics(gobject = gg1)


## Plot the distribution of gene counts per spot
spatPlot2D(gobject = gg1, point_alpha = 0.7, point_size=2.5, show_image=F,
           cell_color = 'nr_genes', color_as_factor = F,
           save_param = list(save_name = '2_e_nr_genes'))

Dimension reduction and clustering

Highly variable genes are identified and used for dimension reduction using PCA and UMAP/tSNE routine. Spatial networks are then created with nearest neighboring spots, followed by Leiden clustering on the resulting spatial network.

## Identify highly variable genes
gg1 <- calculateHVG(gobject = gg1,
                    save_param = list(save_name = '3_a_HVGplot'))

## return_plot = TRUE and return_gobject = TRUE 
## 
##           plot will not be returned to object, but can still be saved with save_plot = TRUE or manually
gene_metadata = fDataDT(gg1)
featgenes = gene_metadata[hvg == 'yes' & perc_cells > 3 & mean_expr_det > 0.4]$gene_ID

## Run PCA analysis
gg1 <- runPCA(gobject = gg1, 
              genes_to_use = featgenes, 
              scale_unit = F, center = T, 
              method="factominer")
## a custom vector of genes will be used to subset the matrix
screePlot(gg1, ncp = 30, save_param = list(save_name = '3_b_screeplot'))
## PCA with name:  pca  already exists and will be used for the screeplot

plotPCA(gobject = gg1,
        save_param = list(save_name = '3_c_PCA_reduction'))

## Run UMAP and tSNE on PCA space, and plot the output
gg1 <- runUMAP(gg1, dimensions_to_use = 1:10)
plotUMAP(gobject = gg1,
         save_param = list(save_name = '3_d_UMAP_reduction'))

gg1 <- runtSNE(gg1, dimensions_to_use = 1:10)
plotTSNE(gobject = gg1,
         save_param = list(save_name = '3_e_tSNE_reduction'))

## Create nearest neighbor network
gg1 <- createNearestNetwork(gobject = gg1, dimensions_to_use = 1:10, k = 15)


## Perform Leiden clustering of spots
gg1 <- doLeidenCluster(gobject = gg1, resolution = 0.15, n_iterations = 1000)
plotUMAP(gobject = gg1,
         cell_color = 'leiden_clus', show_NN_network = T, point_size = 2,
         save_param = list(save_name = '4_a_UMAP_leiden.r0.15'))

spatPlot(gg1, cell_color = 'leiden_clus', point_size=2.5, show_image=F,
         save_param = list(save_name = '4_b_leiden.r0.15'))

## Co-representation of UMAP and spatial positions of spots, plotting Leiden clusters and gene number
spatDimPlot(gobject = gg1, cell_color = 'leiden_clus',
            dim_point_size = 2, spat_point_size = 2,
            save_param = list(save_name = '5_a_covis_leiden_r0.15', base_width=7,base_height=12))

spatDimPlot(gobject = gg1, cell_color = 'nr_genes', color_as_factor = F,
            dim_point_size = 2, spat_point_size = 2,
            save_param = list(save_name = '5_b_nr_genes', base_width=7,base_height=12))

## First produce the signature matrix from an existing single cell sequencing data set.

# Read in the single cell matrix
single_cell_DT = fread("/data/Giotto_AD_codes/ST_data/zeisel_sc_data/zeisel_gnxp_norm_matched_gene_PC.txt")
single_cell_matrix = Giotto:::dt_to_matrix(single_cell_DT)

# Read in the single cell annotation vector
z.cells1 = readRDS("/data/Giotto_AD_codes/ST_data/zeisel_sc_data/zeisel_meta_no_outliers_gene.rds")
cell_annotations = as.vector(z.cells1[,"celltypes_main"])

# Create a signature matrix 
rank_matrix = makeSignMatrixRank(sc_matrix = single_cell_matrix, sc_cluster_ids = cell_annotations)

# Compute cell type enrichment for each spatial spot
gg1 = runSpatialEnrich(gg1, sign_matrix = rank_matrix, enrich_method = 'rank')  

# Define cell type subset of interest and plot the enrichment scores
cell_types_subset = colnames(gg1@spatial_enrichment$rank)[c(2:4,6,7)]
spatCellPlot(gobject = gg1,
             spat_enr_names = 'rank',cell_annotation_values = cell_types_subset,
             cow_n_col = 2,coord_fix_ratio = NULL, point_size = 1.25,
             save_param = list(save_name="7_b_spatcellplot_1_rank"))

## To create individual cell type enrichment plots, add enrichment scores to metadata and plot each cell type
pDataDT(gg1)
##             cell_ID nr_genes perc_genes total_expr leiden_clus
##    1: B05_D2__20_25     3164   20.99536   3913.770           1
##    2:  B05_D2__16_9     3432   22.77372   3953.507           3
##    3:  B05_D2__6_28     4519   29.98673   4192.702           1
##    4:  B05_D2__19_4     4346   28.83875   4281.859           1
##    5: B05_D2__17_20     6558   43.51692   4737.481           2
##   ---                                                         
## 1994: N04_D1__12_26     6995   46.41672   4692.221           4
## 1995: N04_D1__27_18     4886   32.42203   4507.649           2
## 1996: N04_D1__13_27     5670   37.62442   4562.755           4
## 1997: N04_D1__28_21     6193   41.09489   4641.090           2
## 1998: N04_D1__19_30     5073   33.66291   4412.915           2
d.rank = data.frame(gg1@spatial_enrichment$rank)[,cell_types_subset]
gg1 = addCellMetadata(gg1,new_metadata=d.rank)

for (ct in cell_types_subset){
  nm = paste0("rank_",ct)
  spatPlot2D(gobject = gg1, point_alpha = 1, point_size=2.5, show_image=F,
             cell_color = ct, color_as_factor = F,
             save_param = list(save_name = nm))
}

## Create and plot spatial network
gg1 <- createSpatialNetwork(gobject = gg1, 
                            method = 'kNN', k = 8, 
                            maximum_distance_knn = 3, 
                            name = 'spatial_network')

showNetworks(gg1)
## The following images are available:  spatial_network
## [1] "spatial_network"
spatPlot(gobject = gg1, show_network = T, point_size = 2,
         network_color = 'blue', spatial_network_name = 'spatial_network',
         save_param = list(save_name = '9_a_knn_network'))

## Identify spatial genes using rank binarization, and plot several top scoring genes
ranktest = binSpect(gg1, bin_method = 'rank', 
                    calc_hub = T, hub_min_int = 5,
                    spatial_network_name = 'spatial_network')
## 
##  This is the single parameter version of binSpect
##  1. matrix binarization complete 
## 
##  2. spatial enrichment test completed 
## 
##  3. (optional) average expression of high expressing cells calculated 
## 
##  4. (optional) number of high expressing cells calculated
spatGenePlot(gg1, expression_values = 'scaled',
             genes = ranktest$genes[1:6], cow_n_col = 2, point_size = 1.5,
             save_param = list(save_name = '10_b_spatial_genes_rank'))

## Identify sets of spatially co-expressed genes
ext_spatial_genes = ranktest[1:300,]$gene
spat_cor_netw_DT = detectSpatialCorGenes(gg1, 
                                         method = 'network', 
                                         spatial_network_name = 'spatial_network', 
                                         subset_genes = ext_spatial_genes)

## Cluster spatially co-expressed genes
spat_cor_netw_DT = clusterSpatialCorGenes(spat_cor_netw_DT, name = 'spat_netw_clus', k = 10)

## Visualize spatial clusters
heatmSpatialCorGenes(gg1, 
                     spatCorObject = spat_cor_netw_DT, 
                     use_clus_name = 'spat_netw_clus', 
                     heatmap_legend_param = list(title = NULL, top_annotation = "Coexpr.clust"), 
                     save_param = list(save_name="10_c_heatmap_v300.10",
                                       base_height = 12, base_width = 13, units = 'cm'))

## Extract and organize clusters of spatially co-expressed genes
table(spat_cor_netw_DT$cor_clusters$spat_netw_clus)
## 
##  1  2  3  4  5  6  7  8  9 10 
## 36 40 47 25 22 43 23 34 13 17
coexpr_dt = data.table::data.table(genes = names(spat_cor_netw_DT$cor_clusters$spat_netw_clus),
                                   cluster = spat_cor_netw_DT$cor_clusters$spat_netw_clus)
data.table::setorder(coexpr_dt, cluster)


## Read in the list of PIG genes from Chen et al, 2020
pig = toupper(readLines("/data/Giotto_AD_codes/ST_data/PIGgenes_Chen2020.txt"))

## Compute overlaps between PIG genes and spatial gene clusters
pig.overlaps = data.frame()
for (i in min(coexpr_dt[,"cluster"]):max(coexpr_dt[,"cluster"])){
  pig.overlaps[i,"clust.size"] = coexpr_dt[cluster==i,length(genes)]
  pig.overlaps[i,"overlap"] = length(intersect(pig,coexpr_dt[cluster==i,][["genes"]]))
}

pig.overlaps
##    clust.size overlap
## 1          36       0
## 2          40       0
## 3          47       0
## 4          25       0
## 5          22       0
## 6          43      20
## 7          23       0
## 8          34       0
## 9          13       0
## 10         17       0
## Add the mean scaled expression of genes from a relevant cluster to metadata, and plot it
sig6 = coexpr_dt[cluster==6,genes]
spat.clust.6.mean = apply(gg1@norm_scaled_expr[sig6,],2,mean)
gg1 = addCellMetadata(gg1,new_metadata=spat.clust.6.mean)

spatPlot2D(gobject = gg1, point_alpha = 0.7, point_size=2.5, show_image=F,
           cell_color = 'spat.clust.6.mean', color_as_factor = F,
           save_param = list(save_name = '11_sp.clust.6.mean'))